
*Filename: 4a_analysis-sample-long.do
*Created: 20201023
*Last edited: 20210118 (added stratum, cluster, seo; removed SRC sample restriction) 

/*Description: 

	The full dataset with children linked to their parents (child-parent.dta)
	contains an observation for each unique individual, with all of their income, etc.
	data for all years in a WIDE format (with the wave number appended to the variable
	name). The parent data is also in a wide format, with the m_ or f_ prefix and the
	wave number appended to the variable name as well.
	
	The code in this file converts that dataset to a LONG dataset, so that the observations
	are at the individual (child) -year level. For a given child-year observation, the 
	data for parents also correspond to that year.
	
	A "year" variable is created to indicate the INCOME year (year prior to the survey).	
*/

******************************************************* 
clear 
//version 16.0
set more off
capture log close 
set maxvar 32000

 
*Log file
log using ${projdata}/analysis-sample-long.log, replace



*Number of waves to uses
gl NW=41


*Variables from raw data to use
loc incomes 	"rinchd? rinchd?? rincwf? rincwf?? rfminc? rfminc?? ?_rinchd? ?_rinchd?? ?_rincwf? ?_rincwf?? ?_rfminc? ?_rfminc?? rearnbushd? rearnbushd?? rearnbuswf? rearnbuswf?? ?_rearnbushd? ?_rearnbushd?? ?_rearnbuswf? ?_rearnbuswf?? rfarminc? rfarminc?? ?_rfarminc? ?_rfarminc??"
loc incomeacc 	"F_* S_* ?_F_* ?_S_*"
loc occupschl   "schlast schmax"
loc idvars		"newid id? id?? seq? seq?? rel? rel?? iwgt? iwgt?? famwgt? famwgt?? stratum cluster female src seo merge_parents"
loc agevars		"age? age?? cohort"
loc parents		"?_newid ?_id* ?_rel* ?_seq* ?_age* ?_schmax ?_schlast ?_female ?_cohort merge*? "
loc lfstatus    "hourshd? hourswf? hourshd?? hourswf??  ?_hourshd? ?_hourswf? ?_hourshd?? ?_hourswf??" 


*Get raw data
clear
use `incomes' `incomeacc' `occupschl' `idvars' `agevars' `parents' `famstruc' `lfstatus' using ${projdata}/child-parent.dta


*Reshape long
reshape long rinchd rincwf rfminc rearnbushd rearnbuswf rfarminc age id rel seq head wife iwgt famwgt  ///
			 F_inchd_top F_incwf_top F_fminc_top F_fminc_bot F_farminc_top F_farminc_bot S_inchd S_incwf S_nonlabinc S_frminc S_businc S_hourshd S_hourswf ///
			 hourshd hourswf ///
			 f_id f_rel f_seq f_age f_rinchd f_rincwf f_rfminc f_rearnbushd f_rearnbuswf f_rfarminc ///
			 f_F_inchd_top f_F_incwf_top f_F_fminc_top f_F_fminc_bot f_F_farminc_top f_F_farminc_bot f_S_inchd f_S_incwf f_S_nonlabinc f_S_frminc f_S_businc f_S_hourshd f_S_hourswf ///
			 f_hourshd f_hourswf ///
			 m_id m_rel m_seq m_age m_rinchd m_rincwf m_rfminc m_rearnbushd m_rearnbuswf m_rfarminc ///
			 m_F_inchd_top m_F_incwf_top m_F_fminc_top m_F_fminc_bot m_F_farminc_top m_F_farminc_bot m_S_inchd m_S_incwf m_S_nonlabinc m_S_frminc m_S_businc m_S_hourshd m_S_hourswf ///
			 m_hourshd m_hourswf ///
, i(newid) j(wave)

 	 
*Create variable for income year
gen year=.
forv i=1/30 {
 local year=1967+`i'-1
 replace year=`year' if wave==`i'
 di "`year' - WAVE `i' income"
}
forv i=31/$NW {
  loc j=(`i'-30)*2
  local year=1997+`j'-1
  replace year=`year' if wave==`i'
  di "`year' - WAVE `i' income"
}

*** SAVE SAMPLE (LONG)
qui compress
save ${projdata}/analysis-sample-long.dta, replace

clear
log close

*End 4a_analysis-sample-long.do*
